# ---------------------------------------------------------------------------------------------------- #
# PowerDistanceDisputeLevelAnalysis01.R
# ---------------------------------------------------------------------------------------------------- #
#
# Date: 2012-07-20
# Authors: Jonathan Markowitz and Christopher Fariss
#
# All inquires about the models and code should be sent to Chris Fariss 
# Contact: cjf0006@gmail.com
#
# Title: Going the Distance: The Price of Projecting Power
# Most recent version available at: http://ssrn.com/abstract=1961454
#
# Copyright (c) 2012, under the Creative Commons Attribution-Noncommercial-Share Alike 3.0 United States License.
# For more information see: http://creativecommons.org/licenses/by-nc-sa/3.0/us/
# All rights reserved. 
#
# NOTE1: see the paper and PowerDistanceREADME.txt for information on data sources used in this project
#
# NOTE2: run the following R script first: PowerDistanceSystemLevelAnalysis01.R 
#
# R code begins below 
# ---------------------------------------------------------------------------------------------------- #

rm(list = ls())

# ---------------------------------------------------------------------------------------------------- #
# load libraries
# ---------------------------------------------------------------------------------------------------- #
library(simecol); library(gtools); library(geepack); library(plotrix); library(xtable)

# load MID distance data
# this data set is created in the PowerDistanceSystemLevelAnalysis01.R script 
mid <- read.csv("mid_participants_distance_20120627.csv") # now with contiguity indicator from COW
nrow(mid)


# load GDP data sets
gdp <- read.csv("historic_gdp_long_ccode_lags.csv")
nrow(gdp)

#new.gdp <- read.table("expgdp_v5.0.asc", header=TRUE)
#write.csv(new.gdp, "new_gdp.csv")
new.gdp <- read.csv("rgdp_new_lag1.csv")
nrow(new.gdp)

# load CINC material capabilities data
cinc <- read.csv("CINC_lag.csv")
nrow(cinc)

# load state count data
state.count <- read.csv("state_count.csv")
nrow(state.count)

# load polity data 
polity <- read.csv("polity4.csv",na.strings = c("-77", "-88", "-99", "NA"))


# ---------------------------------------------------------------------------------------------------- #
# process data 
# ---------------------------------------------------------------------------------------------------- #
#mid1 <- subset(mid, distance.A == distance.max, select=c(-state.name.A, -state.name.B))
mid1 <- subset(mid, distance.A == distance.max)
nrow(mid1)
#mid2 <- subset(mid, distance.B == distance.max, select=c(-state.name.A, -state.name.B))
mid2 <- subset(mid, distance.B == distance.max)
nrow(mid2)

temp <- mid2$statea
mid2$statea <- mid2$stateb
mid2$stateb <- temp
temp <- mid2$distance.A
mid2$distance.A <- mid2$distance.B
mid2$distance.B <- temp

rm(temp)

mid <- smartbind(mid1, mid2)
nrow(mid)
mid <- subset(mid, distance.A != distance.B)
nrow(mid)
mid <- subset(mid, year>=1870)
nrow(mid)
mid$dyadcode <- 1000*mid$statea + mid$stateb


gdp <- subset(gdp, select=c(#-state,
 -gpd_ir_dolars_1990, -gpd_ir_dolars_1990_lag2))

gdpa <- subset(gdp)
names(gdpa)[4] <- "gdp_lag1_a"
names(gdpa)
nrow(gdpa)

gdpb <- subset(gdp)
names(gdpb)[4] <- "gdp_lag1_b"
names(gdpb)
nrow(gdpb)

ccode_list <- unique(gdpa$ccode)
ccode_list <- na.omit(ccode_list)


new.gdp$rgdp <- new.gdp$pop * new.gdp$rgdpch
new.gdp$rgdp_lag1 <- new.gdp$pop_lag1 * new.gdp$rgdpch_lag1

rgdpa <- subset(new.gdp, select=c(statenum, year, rgdp_lag1))
names(rgdpa) <- c("ccode", "year", "rgdp_lag1_a")
rgdpb <- subset(new.gdp, select=c(statenum, year, rgdp_lag1))
names(rgdpb) <- c("ccode", "year", "rgdp_lag1_b")


cinc <- subset(cinc, year<=2001)

cinca <- subset(cinc, select=c(ccode, year, cinc_lag1))
names(cinca)[3] <- "cinc_lag1_a"
names(cinca)
nrow(cinca)

cincb <- subset(cinc, select=c(ccode, year, cinc_lag1))
names(cincb)[3] <- "cinc_lag1_b"
names(cincb)
nrow(cincb)

politya <- subset(polity, select=c(CCODE, YEAR, POLITY2))
names(politya) <- c("ccode", "year", "polity2a")
names(politya)
nrow(politya)

polityb <- subset(polity, select=c(CCODE, YEAR, POLITY2))
names(polityb) <- c("ccode", "year", "polity2b")
names(polityb)
nrow(polityb)

# ---------------------------------------------------------------------------------------------------- #
# merge data
# ---------------------------------------------------------------------------------------------------- #
data.step.01 <- merge(mid, gdpa, by.x=c("statea", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.01)

data.step.02 <- merge(data.step.01, gdpb, by.x=c("stateb", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.02)

data.step.03 <- merge(data.step.02, cinca, by.x=c("statea", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.03)

data.step.04 <- merge(data.step.03, cincb, by.x=c("stateb", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.04)

data.step.05 <- merge(data.step.04, rgdpa, by.x=c("statea", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.05)

data.step.06 <- merge(data.step.05, rgdpb, by.x=c("stateb", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.06)

data.step.07 <- merge(data.step.06, state.count, by="year", all.x=TRUE, all.y=FALSE)
nrow(data.step.07)


data.step.08 <- merge(data.step.07, politya, by.x=c("statea", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.08)

data.step.09 <- merge(data.step.08, polityb, by.x=c("stateb", "year"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data.step.09)

data.final <- data.step.09



# ---------------------------------------------------------------------------------------------------- #
# detach simecol and load Zelig because of a dependency issue 
# ---------------------------------------------------------------------------------------------------- #
detach(package:simecol)
library(Zelig)

# ---------------------------------------------------------------------------------------------------- #
# Model of Distance to MID with Madison GDP data (1870-2000)
# ---------------------------------------------------------------------------------------------------- #

# model with contiguous disputes excluded 
model <- zelig(log(distance.A) ~ log(distance.B) + log(gdp_lag1_b + 1) + log(gdp_lag1_a + 1) + cinc_lag1_b + cinc_lag1_a + state.count + polity2b + polity2a, data=subset(data.final, conttype >=6), model = "normal.gee", id = "dyadcode", robust=TRUE)
summary(model)$coefficients
xtable(summary(model)$coefficients, digits=3)

# model with all disputes 
model <- zelig(log(distance.A) ~ log(distance.B) + log(gdp_lag1_b + 1) + log(gdp_lag1_a + 1) + cinc_lag1_b + cinc_lag1_a + state.count + polity2b + polity2a, data=data.final, model = "normal.gee", id = "dyadcode", robust=TRUE)
summary(model)$coefficients
xtable(summary(model)$coefficients, digits=3)


# ---------------------------------------------------------------------------------------------------- #
# generate quantities of interest 
# ---------------------------------------------------------------------------------------------------- #
x.high <- setx(model, gdp_lag1_a=quantile(na.omit(data.final$gdp_lag1_a), .75))
x.low <- setx(model, gdp_lag1_a=quantile(na.omit(data.final$gdp_lag1_a), .25))

s.out<-sim(model,x=x.low)
low <- exp(mean(s.out$qi$ev))
low.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(low)
round(low.ci)

s.out<-sim(model,x=x.high)
high <- exp(mean(s.out$qi$ev))
high.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(high)
round(high.ci)

fd <- high-low
round(fd)

fd.ci <- high.ci-low.ci 
round(fd.ci)



# ---------------------------------------------------------------------------------------------------- #
# Model of Distance to MID with Gleditsch GDP data (1950-2000)
# ---------------------------------------------------------------------------------------------------- #


# model with contiguous disputes excluded 
model <- zelig(log(distance.A) ~ log(distance.B) + log(rgdp_lag1_b + 1) + log(rgdp_lag1_a + 1) + cinc_lag1_b + cinc_lag1_a + state.count + polity2b + polity2a, data=subset(data.final, conttype>=6), model = "normal.gee", id = "dyadcode", robust=TRUE)
summary(model)$coefficients
xtable(summary(model)$coefficients, digits=3)

# model with all disputes 
model <- zelig(log(distance.A) ~ log(distance.B) + log(rgdp_lag1_b + 1) + log(rgdp_lag1_a + 1) + cinc_lag1_b + cinc_lag1_a + state.count + polity2b + polity2a, data=data.final, model = "normal.gee", id = "dyadcode", robust=TRUE)
summary(model)$coefficients
xtable(summary(model)$coefficients, digits=3)



# ---------------------------------------------------------------------------------------------------- #
# Generate Quantities of Interest 
# ---------------------------------------------------------------------------------------------------- #
x.high <- setx(model, rgdp_lag1_a=quantile(na.omit(data.final$rgdp_lag1_a), .75))
x.low <- setx(model, rgdp_lag1_a=quantile(na.omit(data.final$rgdp_lag1_a), .25))

s.out<-sim(model,x=x.low)
low <- exp(mean(s.out$qi$ev))
low.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(low)
round(low.ci)

s.out<-sim(model,x=x.high)
high <- exp(mean(s.out$qi$ev))
high.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(high)
round(high.ci)

fd <- high-low
round(fd)

fd.ci <- high.ci-low.ci 
round(fd.ci)


x.high <- setx(model, rgdp_lag1_a=quantile(na.omit(data.final$rgdp_lag1_a), .95))
x.low <- setx(model, rgdp_lag1_a=quantile(na.omit(data.final$rgdp_lag1_a), .75))

s.out<-sim(model,x=x.low)
low <- exp(mean(s.out$qi$ev))
low.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(low)
round(low.ci)

s.out<-sim(model,x=x.high)
high <- exp(mean(s.out$qi$ev))
high.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(high)
round(high.ci)

fd <- high-low
round(fd)

fd.ci <- high.ci-low.ci 
round(fd.ci)

x.high <- setx(model, rgdp_lag1_a=quantile(na.omit(data.final$rgdp_lag1_a), .99))
x.low <- setx(model, rgdp_lag1_a=quantile(na.omit(data.final$rgdp_lag1_a), .95))

s.out<-sim(model,x=x.low)
low <- exp(mean(s.out$qi$ev))
low.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(low)
round(low.ci)

s.out<-sim(model,x=x.high)
high <- exp(mean(s.out$qi$ev))
high.ci <- exp(quantile(s.out$qi$ev, c(.025, .975)))
round(high)
round(high.ci)

fd <- high-low
round(fd)

fd.ci <- high.ci-low.ci 
round(fd.ci)



# ---------------------------------------------------------------------------------------------------- #
# End of File
# ---------------------------------------------------------------------------------------------------- #

